"""
爬取网页步骤：
    1.选着要爬的网址 (url)
    2.使用 python 登录上这个网址 (urlopen等)
    3.读取网页信息 (read() 出来)
    4.将读取的信息放入 BeautifulSoup
    5.使用 BeautifulSoup 选取 tag 信息等 (代替正则表达式)
"""
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re

html = urlopen('https://mofanpy.com/static/scraping/list.html').read().decode('utf-8')

soup = BeautifulSoup(html, features='lxml')

# print(soup)
# print(soup.title)
# print(soup.p)


# 11111111111111
all_href = soup.find_all('a')
all_href = [l['href'] for l in all_href]
# print('\n', all_href)

# 222222222222222
month = soup.find_all('li', {'class':'month'})
for m in month:
    print(m.get_text())


# 333333333333333
jan = soup.find('ul', {'class':'jan'})
d_jan = jan.find_all('li')
for d in d_jan:
    print(d.get_text())